# -*- coding:UTF-8 -*-
import json
import time

from bson import ObjectId

from console_pipeline import ConsolePipeline
from mongo_pipeline import MongoDBPipeline
from spider import Spider
from util.date_util import DateUtil
from jsonpath_rw import jsonpath, parser


class BilibiliSpider:
    start_url = []
    headers = None

    def __init__(self):
        self.start_url = self.__generate_url()

    def __generate_url(self):
        for i in range(32790000, 32899110):
            url = "https://api.bilibili.com/x/web-interface/view?aid=%d" % i
            yield url

    def process(self, html):
        items = {}
        result = json.loads(html)
        # 只记录播放数大于50W的视频
        if result.get("code") == 0 and int(result.get("data").get("stat").get("view")) > 500000:
            data = result.get("data")
            items["_id"] = ObjectId()
            items["title"] = data.get("title")
            items["tid"] = data.get("tid")
            items["tname"] = data.get("tname")
            ts = int(data.get("ctime"))
            items["ctime"] = ts
            items["publish_time"] = DateUtil.formatFromTimeStamp(ts, patten="%Y-%m-%d %H:%M:%S")
            items["desc"] = data.get("desc")
            items["view"] = data.get("stat").get("view")
            items["danmaku"] = data.get("stat").get("danmaku")
            items["favorite"] = data.get("stat").get("favorite")
        else:
            items = None
        time.sleep(0.005)
        yield items


if __name__ == "__main__":
    thread_options = {"multiplethread": True, "queueTimeOut": 2, "threadCount": 500}
    Spider(BilibiliSpider(), threadoptions=thread_options) \
        .addPipeline(ConsolePipeline()) \
        .addPipeline(MongoDBPipeline("video", "bilibili")) \
        .start()
